Q1

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(plotly)
## Loading required package: ggplot2
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
hist = readRDS('historical_web_data_26112015.rds')
plot_ly(x = hist$Longitude, y = hist$Latitude, z = hist$Depth, size = hist$Magnitude) %>% add_markers() %>% layout(scene = list(xaxis = list(title = 'Longitude'), yaxis = list(title = 'Latitude'), zaxis = list(title = 'Depth')))

Q2

library(readr)
library(mapdata)
library(ggplot2)
library(gganimate)
library(ggmap)
disaster = read_delim('disaster.txt', '\t', escape_double = FALSE, trim_ws = TRUE)
tsunami = disaster %>% filter(FLAG_TSUNAMI == 'Tsu')
world_map = map_data('world')

tsunami = tsunami[!is.na(tsunami$EQ_PRIMARY),]
tsunami = tsunami %>% filter(EQ_PRIMARY > 7)

ggplot() + geom_polygon(data = world_map, aes(x = long, y = lat, group = group)) + coord_fixed(1.2) + geom_point(data = tsunami, aes(x = LONGITUDE, y = LATITUDE), color = 'red', size = 0.5) + transition_time(EQ_PRIMARY) + ease_aes('linear') + labs(title = 'MAGNITUDE {frame_time}')

Q3

iran_er = readRDS('iran_earthquake.rds')
iran_map = read_rds("Tehrn_map_6.rds")

ggmap(iran_map) + stat_density_2d(geom = 'polygon', data = iran_er, aes(x =Long, y = Lat, fill = ..level..))
## Warning: Removed 243 rows containing non-finite values (stat_density2d).

Q4

iran_dis = disaster %>% filter(COUNTRY == 'IRAN' & EQ_PRIMARY > 6.5)

diff_year = data.frame(iran_dis[-1,]$YEAR - iran_dis[-nrow(iran_dis),]$YEAR)
colnames(diff_year)[1] = 'diff'

x = diff_year %>% filter(diff > 6)
x = nrow(x)
y = nrow(diff_year)
cat(1 - x/y)
## 0.5185185

Q5

library(rworldmap)
## Loading required package: sp
## ### Welcome to rworldmap ###
## For a short introduction type :   vignette('rworldmap')
country_average = disaster %>% group_by(COUNTRY) %>% summarise(avgDeath = mean(DEATHS, na.rm = TRUE), totalDeath = sum(DEATHS, na.rm = TRUE))

colnames(country_average)[colnames(country_average)=='COUNTRY'] <- 'country'

mapDevice('x11')
heatMap = joinCountryData2Map(country_average, joinCode="NAME", nameJoinColumn="country")
## 138 codes from your data successfully matched countries in the map
## 16 codes from your data failed to match with a country code in the map
## 105 codes from the map weren't represented in your data
mapCountryData(heatMap, nameColumnToPlot="totalDeath")
## You asked for 7 quantiles, only 5 could be created in quantiles classification

Q6

model = lm(data = disaster, formula = DEATHS ~ LONGITUDE + LATITUDE + FOCAL_DEPTH + EQ_PRIMARY)
summary(model)
## 
## Call:
## lm(formula = DEATHS ~ LONGITUDE + LATITUDE + FOCAL_DEPTH + EQ_PRIMARY, 
##     data = disaster)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -12556  -3519  -1872     35 311710 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -15451.303   3081.358  -5.014 6.13e-07 ***
## LONGITUDE       -1.232      5.813  -0.212  0.83226    
## LATITUDE        64.237     21.852   2.940  0.00335 ** 
## FOCAL_DEPTH    -21.929     11.354  -1.931  0.05367 .  
## EQ_PRIMARY    2678.740    464.175   5.771 1.01e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 15440 on 1178 degrees of freedom
##   (4843 observations deleted due to missingness)
## Multiple R-squared:  0.031,  Adjusted R-squared:  0.0277 
## F-statistic:  9.42 on 4 and 1178 DF,  p-value: 1.7e-07

Q7

library(lubridate)
## 
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
## 
##     date
library(stringr)
worldwide = read.csv('worldwide.csv')
worldwide$date = as_date(worldwide$time)
worldwide$day = day(worldwide$date)
worldwide$year = year(worldwide$date)
worldwide$month = month(worldwide$date)
worldwide$country = sapply(worldwide$place, function(X){
  tail(str_split(X, ", ")[[1]], 1)
})

biggest = worldwide %>% group_by(country, day, month, year) %>% filter(mag == max(mag))

secondBiggest = worldwide %>% group_by(country, day, month, year) %>%
  mutate(rank = rank(desc(mag))) %>%
  arrange(rank) %>% filter(rank == 2)

joined = inner_join(secondBiggest, biggest, by = c('country', 'day', 'month', 'year'))

train = joined[1:as.integer(0.8 * nrow(joined)),]
test = joined[-(1:as.integer(0.8 * nrow(joined))),]

reg = lm(data = train, mag.y ~ mag.x)
summary(reg)
## 
## Call:
## lm(formula = mag.y ~ mag.x, data = train)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.39162 -0.28967 -0.09111  0.11033  2.60982 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.38649    0.02342    16.5   <2e-16 ***
## mag.x        1.00103    0.00583   171.7   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3756 on 6926 degrees of freedom
## Multiple R-squared:  0.8098, Adjusted R-squared:  0.8097 
## F-statistic: 2.948e+04 on 1 and 6926 DF,  p-value: < 2.2e-16
summary(stats::predict(reg, test) - test$mag.y)
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## -2.508789 -0.109405  0.091314  0.006798  0.290287  0.391211

Q8

جون مقدار کورلیشن تست کم شده است در نتیجه این دو مقدار به هم وابسته نیستند.

cor.test(worldwide$depth, worldwide$mag, method=c("pearson", "kendall", "spearman"))
## 
##  Pearson's product-moment correlation
## 
## data:  worldwide$depth and worldwide$mag
## t = 50.03, df = 60629, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.1914584 0.2067469
## sample estimates:
##       cor 
## 0.1991147

Q9

library(lubridate)
worldwide = read.csv('worldwide.csv')
worldwide$place = sub(".*, ", "", worldwide$place)
worldwide$time = as.Date(worldwide$time)
worldwide = worldwide %>% mutate(year = year(time))
worldwide = worldwide %>% group_by(place, year) %>% summarise(count = n())
worldwide = worldwide %>% group_by(place) %>% summarise(cnt = mean(count))
ww = worldwide %>% arrange(desc(cnt)) %>% top_n(10) 
## Selecting by cnt
ggplot(ww) + geom_bar(aes(reorder(place, cnt), cnt), stat = "identity") + theme(axis.text.x = element_text(angle = 90, hjust = 1))

Q10

در قسمت اول بیشترین تعداد زلزله بالای ۳ ریشتر در استان های کشور به دست آمده است. در قسمت دوم تعداد زلزله های بالای ۶ ریشتر از سال ۲۰۰۶ به بعد در ایران نشان داده شده است. در قسمت سوم بیشترین تعداد زلزله با بزرگی بیشتر از ۸ ریشتر به ترتیب کشور نشان داده شده

iran2015 = hist %>% filter(Magnitude > 3) %>% group_by(Province) %>% summarise(cnt = n()) %>% arrange(desc(cnt)) %>% top_n(10) 
## Selecting by cnt
ggplot(iran2015) + geom_bar(aes(reorder(Province, cnt), cnt), stat = "identity")

iran_er %>% filter(Mag > 6) %>% summarise(cnt = n())
##   cnt
## 1  13
bigearth = disaster %>% filter(EQ_PRIMARY > 8) %>% group_by(COUNTRY) %>% summarise(cnt = n()) %>% arrange(desc(cnt)) %>% top_n(10)
## Selecting by cnt
ggplot(bigearth) + geom_bar(aes(reorder(COUNTRY, cnt), cnt), stat = "identity")